from lxml import  etree

# html_str = """
# <html>
#   <body>
#     <h1>标题</h1>
#     <ul>
#       <li class="item">项目1</li>
#       <li class="item">项目2</li>
#     </ul>
#   </body>
# </html>
# """
#
# tree = etree.HTML(html_str)
# print(tree.xpath('//li[@class="item"]'))
# print(tree.tag,tree.attrib)

# xml_str = """
# <root>
#   <person id="1">
#     <name>张三</name>
#     <age>30</age>
#   </person>
#   <person id="2">
#     <name>李四</name>
#     <age>25</age>
#   </person>
# </root>"""
#
# root = etree.XML(xml_str)
# for person in root.iter('person'):
#     pid = person.get('id')
#     name = person.find('name').text
#     age = person.find('age').text
#     print(f"ID: {pid}, 姓名: {name}, 年龄: {age}")

html_str = """
<html>
  <body>
    <h1>标题</h1>
    <ul>
      <li class="item1">项目1</li>
      <li class="item2">项目2</li>
    </ul>
    <ul>
      <li class="item11">项目1</li>
      <li class="item22">项目2</li>
    </ul>
    <ol>
        <li class="1"><a href="https://www.baidu.com">百度</a></li>
        <li class="2"><a href="https://www.baidu.com">百度</a></li>
        <li class="3"><a href="https://www.baidu.com">百度</a></li>
    </ol> 
  </body>
</html>
"""
tree = etree.HTML(html_str)

# lis = tree.cssselect('li')
# for li in lis:
#     print(li.text,li.attrib,li.tag)
# print("================")
# lis = tree.cssselect('ol li')
# for li in lis:
#     print(li.text,li.attrib,li.tag)
#     a = li.cssselect('a')
#     print(type(a))
#     print(a[0].text,a[0].attrib,a[0].tag)

# lis = tree.xpath('//li[@class="1"]/a[@href]')
# for li in lis:
#     print(li.text,li.attrib,li.tag)

# lis = tree.xpath('//ul/*[1]')
# for li in lis:
#     print(li.text,li.attrib,li.tag)
#
# lis = tree.xpath('//ul/*[last()]')
# for li in lis:
#     print(li.text,li.attrib,li.tag)

lis = tree.xpath('//*[position()>1]')
for li in lis:
    print(li.text,li.attrib,li.tag)

